Purpose

Publicly available mouse and human datasets of pre/post gene expression in skeletal muscle were used to project mouse patterns onto human data using ProjectR package. The end goal is to find latent variables in mouse gene expression to project onto human data to make mouse data better recapitulate human data.
Built with R 4.0.5
Lab notebook
Wrike project

Data
Mouse Data: GSE97718
* 16 samples
* Control diet (n=8) high fat diet (n=8)
* Pre and post exercise samples collected from each mouse
* RNA isolated from quadricep skeletal muscle
* fastq’s downloaded from ENA
* data preprocessed with RNA-seq STAR pipeline by Lara Ianov

Human Data: GSE108643
* 59 samples
* Participants divided by BMI: lean ((BMI<25, 18.5- 24.1 kg/m2, n=15) and Ov/Ob (BMI≥25, 25.5- 36.9 kg/m2, n=15)
* Pre and post exercise samples collected from each participant
* RNA isolated from vastus lateralis muscle biopsies
* fastq’s downloaded from ENA
* data preprocessed with RNA-seq STAR pipeline by Lara Ianov

System which operations were done on: MacBook Pro (16-inch, 2019), Processor 2.4 GHz 8-Core Intel Core i9, Memory 64 GB 2667 MHz DDR4, Graphics AMD Radeon Pro 5300M 4 GB Intel UHD Graphics 630 1536 MB

Read in Mouse Data

#Load in raw counts for Mouse pre/post exercise experiment
mouse_counts <- read.delim("/Users/eramsey/Desktop/R21_210302/PreliminaryR21/mouse210321_fixed_raw_counts.txt", header = TRUE, row.names = 1, sep = "\t", stringsAsFactors = FALSE)
mouse_counts <- mouse_counts[, -c(1:3)]

Make Mouse Metadata Table
Metadata/info table not supplied with this study. Using GEO, manually created (see here https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE97718)

#metadata not included with this dataset.. 
mouse_info <- read.csv("MouseSraRunTable.txt", header = TRUE, sep = ",")
Name <- c("CON_pre4","HFD_pre2","HFD_pre3","HFD_pre4","CON_post1","CON_post2","CON_post3","CON_post4","HFD_post4","CON_pre3","CON_pre2","HFD_post2","HFD_post3","HFD_pre1","CON_pre1","HFD_post1")
mouse_info <- cbind(mouse_info, Name)
#making columns specific to condition (diet and exercise)
mouse_info <- mutate(mouse_info, Diet = ifelse(grepl("HFD", Name, ignore.case = TRUE), "HFD", "CON"))
mouse_info <- mutate(mouse_info, Exercise = ifelse(grepl("pre", Name, ignore.case = TRUE), "pre", "post"))
mouse_info <- column_to_rownames(mouse_info, var = "Name")

Read in Human Data

#HUMAN data read in
human_data <- read.delim("/Users/eramsey/Desktop/R21_210302/PreliminaryR21/human_fixed_raw_counts.txt", header = TRUE, row.names = 1, sep = "\t")
human_data <- human_data[, -c(1:3)]
human_info <- read.csv("human_info.csv", header = TRUE, sep = ",")
human_info <- human_info[c(1:58),]
#make new label names for human samples
human_info_test <- unite(human_info, col = Name, BMI, PrePost, sep = "_", remove = FALSE)

numbered_list <- c(1:14, 1:15, 1:14, 1:15)
names <- paste(human_info_test$Name, numbered_list, sep = "_")
human_info_test[["Names"]] <- names
human_info <- column_to_rownames(human_info_test, var = "Names")

Normalization for Mouse

DESeq2 was used to normalize data

mouse_dds <- DESeqDataSetFromMatrix(mouse_counts, colData = mouse_info, design = ~ Exercise)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
mouse_dds <- DESeq(mouse_dds)
#Contrast for log fold change, pre = numerator, post = denominator
mouse_res <- results(mouse_dds, contrast = c("Exercise", "post", "pre"))
write.csv(as.data.frame(mouse_res), file = "Mouse_DESeq2_results.csv")

mouse_vsd <- vst(mouse_dds, blind = FALSE)
mouse_deseq2_pca <- DESeq2::plotPCA(mouse_vsd, intgroup = "Exercise", )
mouse_deseq2_pca

Normalization for Human

DESeq2 was used to normalize data

#HUMAN normalization
human_dds <- DESeqDataSetFromMatrix(human_data, colData = human_info, design = ~ PrePost)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
human_dds <- DESeq(human_dds)
human_res <- results(human_dds, contrast = c("PrePost", "Post", "Pre"))
write.csv(human_res, file = "210518_Human_DESeq_Res.csv")
human_vsd <- vst(human_dds, blind = FALSE)

# trying to get lfc in order
human_de <- cbind(LFC = human_res$log2FoldChange, Pval = human_res$pvalue)
row.names(human_de) <- row.names(human_res)
human_de_df <- as.data.frame(human_de)
human_de_ordered <- human_de_df[order(-human_de_df$LFC), , drop = FALSE]
human_vsd_matrix <- assay(human_vsd)
human_deseq2_pca <- plotPCA(human_vsd, intgroup = "PrePost")
human_deseq2_pca

rownames(human_vsd_matrix) <- sub("\\..*", "", rownames(human_vsd_matrix))
#back to df and make ensembl gene rownames into a column
human_vsd_matrix <- human_vsd_matrix %>% as.data.frame() %>% rownames_to_column(., "ENSEMBL")

Annotation

Mouse Gene ID Conversion

#preparing outputs for conversion
mouse_vsd_matrix <- assay(mouse_vsd)
rownames(mouse_vsd_matrix) <- sub("\\..*", "", rownames(mouse_vsd_matrix))
mouse_vsd2 <- mouse_vsd_matrix %>% as.data.frame() %>% rownames_to_column(., "ENSEMBL")
write.csv(mouse_info, file= "New_Mouse_Metadata")

AnnotationDbi for Mouse Symbols
AnnotationDbi::idConverter can convert from one ID type of a species and map to orthologous genes and convert to another gene ID type… However, for human, it uses ‘hom.Mm.inp.db’ and this database is deprecated and no longer works on Bioconductor past version 3.12
Just leaving this note because because a functioning version of this would be wonderful

Lengthy process to convert ID’s, remove duplicates, and remove NA’s, see more info here

#annotation
library(org.Mm.eg.db)
#retrieve conversion info from one ID type to another
mouse_anno <- AnnotationDbi::select(org.Mm.eg.db, keys = rownames(mouse_vsd_matrix), columns = c("SYMBOL", "GO"),keytype = "ENSEMBL")
#determine indices for non-NA genes
mousenon_na_symbols <- which(is.na(mouse_anno$SYMBOL) == FALSE)
#return only the genes with annotations using indices
mouse_anno <- mouse_anno[mousenon_na_symbols, ]
#determine indices for non-duplicated genes
mouseno_dups_symbols <- which(duplicated(mouse_anno$SYMBOL) == FALSE)
#return only non-dup genes using indices
mouse_anno <- mouse_anno[mouseno_dups_symbols, ]
#add symbols to normalized mouse data
mouse_symbol <- inner_join(mouse_anno, mouse_vsd2, by = "ENSEMBL")
#has GO annotation as well as the other gene ID info
mouse_symbol_GO <- column_to_rownames(mouse_symbol, var = "SYMBOL")
#removing annotations, symbols for row names, format for analysis
#!!! this may impact analysis???
#mouse_symbol <- mouse_symbol_GO[,c(5:20)] #THERE WERE 22 COLUMNS AND I CUT OFF TWO OF THEM WITH THIS!!
mouse_symbol <- mouse_symbol_GO[, !names(mouse_symbol_GO) %in% c("ENSEMBL", "GO", "EVIDENCE", "ONTOLOGY")]

AnnotationDbi for Human Symbols

#annotation
#retrieve conversion info from one ID type to another
symbols_human <- AnnotationDbi::select(org.Hs.eg.db, keys = human_vsd_matrix$ENSEMBL, columns = c("SYMBOL", "GO"),keytype = "ENSEMBL")
## 'select()' returned 1:many mapping between keys and columns
#determine indices for non-NA genes
non_na <- which(is.na(symbols_human$SYMBOL) == FALSE)
#return only the genes with annotations using indices
symbols_human <- symbols_human[non_na, ]
#determine indices for non-duplicated genes
no_dups_human <- which(duplicated(symbols_human$SYMBOL) == FALSE)
#return only non-dup genes using indices
symbols_human <- symbols_human[no_dups_human, ]
human_symbol_data <- inner_join(symbols_human, human_vsd_matrix, by = "ENSEMBL")
#has GO annotation as well as the other gene ID info
human_symbol_data_GO <- column_to_rownames(human_symbol_data, var = "SYMBOL")
#removing annotations, symbols for row names, format for analysis
#commented out previous method: not as reproducible
#human_symbol_data <- human_symbol_data_GO[,5:62]
human_symbol_data <- human_symbol_data_GO[, !names(human_symbol_data_GO) %in% c("ENSEMBL", "GO", "EVIDENCE", "ONTOLOGY")]

Convert Mouse to Human Orthologous Symbols

To be able to compare gene expression data from mouse to human, must convert first to orthologous genes

## Basic function to convert mouse to human gene names
convertMouseGeneList <- function(x){
require("biomaRt")
human = useMart("ensembl", dataset = "hsapiens_gene_ensembl")
mouse = useMart("ensembl", dataset = "mmusculus_gene_ensembl")
genesV2 = getLDS(attributes = c("mgi_symbol"), filters = "mgi_symbol", values = x , mart = mouse, attributesL = c("hgnc_symbol"), martL = human, uniqueRows=T)
return(genesV2)
}
#Use convertMouseGeneList to convert to human genes
mouse_to_human_genes <- convertMouseGeneList(mouse_anno$SYMBOL)
conv_mouse <- mouse_symbol %>% rownames_to_column(., var = "MGI.symbol") %>% left_join(., mouse_to_human_genes, by = "MGI.symbol")
#determine non-NA genes
non_na_mouse <- which(is.na(conv_mouse$HGNC.symbol) == FALSE)
#return only the genes with annotations using indices
conv_mouse <- conv_mouse[non_na_mouse, ]
#determine indices for non-duplicated genes
no_dups_mouse <- which(duplicated(conv_mouse$HGNC.symbol) == FALSE)
#return only non-dup genes using indices
conv_mouse <- conv_mouse[no_dups_mouse, ]
#set rownames to NULL to be able to set new rownames with HGNC.symbol
rownames(conv_mouse) <- NULL 
conv_mouse <- conv_mouse %>% as.data.frame() %>% column_to_rownames(., var = "HGNC.symbol") 
# originally subsetted like this vv but it's risk and less reproducible
#conv_mouse <- conv_mouse[,c(2:17)]
conv_mouse <- conv_mouse[, !names(conv_mouse) %in% c("MGI.symbol")]

NMF Using CoGAPS for Mouse

How many patterns? Might need to finish FEA to figure out.

#MOUSE NMF
#parameters for GoGAPS
params <- new("CogapsParams")
params <- setParam(params, "seed", 1000)
params <- setParam(params, "nPatterns", 9)
#CoGAPS to find patterns in the data
#set parameters for number of patterns

AP_mouse <- CoGAPS(conv_mouse, params, nIterations = 5000)
## 
## This is CoGAPS version 3.10.0 
## Running Standard CoGAPS on conv_mouse (18200 genes and 16 samples) with parameters:
## 
## -- Standard Parameters --
## nPatterns            9 
## nIterations          5000 
## seed                 1000 
## sparseOptimization   FALSE 
## 
## -- Sparsity Parameters --
## alpha          0.01 
## maxGibbsMass   100
getMeanChiSq(AP_mouse)
## [1] 33413.22

For Loop for Chi Square Matrix from CoGAPS Runs

NMF Using CoGAPS for Human

#CoGAPS to find patterns in the data
AP_human <- CoGAPS(human_symbol_data, params, nIterations = 5000)
## 
## This is CoGAPS version 3.10.0 
## Running Standard CoGAPS on human_symbol_data (35170 genes and 58 samples) with parameters:
## 
## -- Standard Parameters --
## nPatterns            9 
## nIterations          5000 
## seed                 1000 
## sparseOptimization   FALSE 
## 
## -- Sparsity Parameters --
## alpha          0.01 
## maxGibbsMass   100
getMeanChiSq(AP_human)
## [1] 523937
plot(AP_human)

Boxplot for Patterns in Mouse

Mouse Pattern Violin Plots

mouse_featureLoadings <- AP_mouse@featureLoadings %>% as.data.frame() 
boxplot_loadings <- boxplot(mouse_featureLoadings, xlab = "NMF Patterns", ylab = "Amplitude", las = 2, main = "Mouse Feature Loadings")

Base R Boxplot for Patterns in Human

human_samplefactors <- AP_human@sampleFactors %>% as.data.frame() 
#%>% rownames_to_column(., var = "Sample")
human_samplefactors <- cbind(human_samplefactors, Exercise = human_info$PrePost)

#split into two matrices, pre and post, and then use add to "add boxplot to current plot"
#group pre/post exercise into two different matrices, and then combine them into a boxplot
pre_exercise_human <- dplyr::filter(human_samplefactors, Exercise == "Pre")
pre_exercise_human <- subset(pre_exercise_human, select = -Exercise)
post_exercise_human <- dplyr::filter(human_samplefactors, Exercise == "Post")
post_exercise_human <- subset(post_exercise_human, select = -Exercise)
#base r boxplot
boxplot_factors_post_human <- boxplot(post_exercise_human, las = 2, ylab = "Sample Factors", main = "Human Post Exercise Patterns", col = "powderblue")
stripchart(post_exercise_human, vertical = TRUE, method = "jitter", add = TRUE, pch = 20, col = 'black') #add data points

boxplot_factors_pre_human <- boxplot(pre_exercise_human, las = 2, ylab = "Sample Factors", main = "Human Pre Exercise Patterns", col = "plum2")
stripchart(pre_exercise_human, vertical = TRUE, method = "jitter", add = TRUE, pch = 20, col = 'black')

ggPlot Boxplot for Patterns in Human

human_sampleFactors <- AP_human@sampleFactors

human_sampleFactors_long <- human_sampleFactors %>% as.data.frame() %>% rownames_to_column(., var = "Sample") %>% pivot_longer( cols = -Sample, names_to = "Patterns", values_to = "Factors")  %>% mutate(Exercise = ifelse(grepl("Pre", Sample, ignore.case = TRUE), "Pre", "Post")) %>% unite( col = "Exercise_Pattern", Patterns, Exercise, sep = "_", remove = FALSE)

human_NMF_boxplot <- ggplot(human_sampleFactors_long, aes(x = Exercise_Pattern, y = Factors)) +
  geom_boxplot(color = "gray60") +
  geom_point(aes(color = Exercise)) +
  geom_jitter(size = 2, alpha = 0.25, width = 0.2) +
  scale_colour_viridis_d() +
  labs(x = "NMF Patterns", y = "Sample Weights") + 
  theme(panel.grid = element_blank(), axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1, size = 16), axis.title = element_text(size = 16), title = element_text(size = 18), legend.text = element_text(size = 16)) +
  theme(plot.margin = unit(c(1,1,1.5,1.2),"cm")) + #top, right, bottom, left
    labs(title = "NMF Patterns in Pre/Post Exercise Samples")
human_NMF_boxplot

Mouse NMF Heatmap

Heatmap set up the same as ProjectR vignette, using hierachical clustering

NMF_mouse <-heatmap.2(as.matrix(AP_mouse),col=viridis(100), trace='none',
                 distfun=function(c) as.dist(1-cor(t(c))) ,
                 cexCol=1,cexRow=.6,scale = "row", main = "Mouse NMF", labRow = rownames(conv_mouse), xlab = "Patterns",
                 hclustfun=function(x) hclust(x, method="average"))

Mouse NMF ggPlot Heatmap - Data Wrangling

mouse_featloadings <- AP_mouse@featureLoadings

#estimate variance for each row 
var_mouse_featloadings <- apply(mouse_featloadings, 1, var)
#top 100 most variable genes
top100_var_mouse_featloadings_names <- names(sort(var_mouse_featloadings, decreasing = TRUE))[1:100]
head(top100_var_mouse_featloadings_names)
## [1] "UBE2C"   "RABEP2"  "TLCD3B"  "TSTD2"   "POLR3H"  "CCDC127"

Mouse - Highest feature/gene weight averages

#to look at the highest weighted genes, going to select genes with highest averages across patterns
avg_mouse_featloadings <- apply(mouse_featloadings, 1, mean)

top75_mouse_featloadings_names <- names(sort(avg_mouse_featloadings, decreasing = TRUE))[1:75]
head(top75_mouse_featloadings_names)
## [1] "RABEP2"  "UBE2C"   "TLCD3B"  "POLR3H"  "MAP3K15" "TSTD2"

Mouse data wrangling

#data wrangling
top75_mouse_featloadings <- mouse_featloadings[top75_mouse_featloadings_names,]
top75_mouse_featloadings <- as.data.frame(top75_mouse_featloadings)
top75_mouse_featloadings <- rownames_to_column(top75_mouse_featloadings, var = "genes")
dim(top75_mouse_featloadings)
## [1] 75 10
head(top75_mouse_featloadings)
##     genes  Pattern_1 Pattern_2 Pattern_3 Pattern_4 Pattern_5 Pattern_6
## 1  RABEP2 0.01983772  2.230480  2.168315  2.404643  2.048885  1.338395
## 2   UBE2C 0.05603908  1.818706  1.650107  2.536131  2.197390  1.658526
## 3  TLCD3B 0.05460929  2.058435  1.727842  1.579264  1.782673  1.802371
## 4  POLR3H 0.03003079  1.273096  1.445761  1.613505  1.472139  1.207789
## 5 MAP3K15 0.06058228  1.170291  1.451192  1.810609  1.619943  1.126586
## 6   TSTD2 0.06705469  1.428910  1.599869  1.297245  1.245230  1.212494
##   Pattern_7 Pattern_8 Pattern_9
## 1  2.414575 0.7157919  2.704867
## 2  2.030578 0.4868053  3.071728
## 3  2.551762 0.3860395  1.918536
## 4  1.695494 0.3991383  2.013255
## 5  1.330572 0.5106807  1.855779
## 6  1.202255 0.2561178  2.162539

Mouse Heatmap
https://jcoliver.github.io/learn-r/009-expression-heatmaps.html

mouse_featloadings_long <- pivot_longer(data = top75_mouse_featloadings, 
                         cols = -genes,
                         names_to = "pattern", 
                         values_to = "feat_weight")
head(mouse_featloadings_long)
## # A tibble: 6 × 3
##   genes  pattern   feat_weight
##   <chr>  <chr>           <dbl>
## 1 RABEP2 Pattern_1      0.0198
## 2 RABEP2 Pattern_2      2.23  
## 3 RABEP2 Pattern_3      2.17  
## 4 RABEP2 Pattern_4      2.40  
## 5 RABEP2 Pattern_5      2.05  
## 6 RABEP2 Pattern_6      1.34
mouse_featloadings_long$log.weight <- log(mouse_featloadings_long$feat_weight)

Mouse NMF Amplitude Matrix Heatmap in ggplot

Heatmap visualization of Amplitude/features/gene weight matrix (CoGAPS output)

mouse_featloadings_heatmap <- ggplot(mouse_featloadings_long, mapping = aes(x = pattern, y = genes, fill = log.weight)) + geom_tile() + xlab(label = "Patterns") + ylab(label = "Genes") + theme(axis.text.x = element_text(angle = 45, vjust = 0.5))
mouse_featloadings_heatmap

ggsave("210618_NMF_mouse_featloadings_heatmap.pdf", mouse_featloadings_heatmap)

Human NMF Heatmap

Heatmap visualization of Amplitude/features/gene weight matrix (CoGAPS output)

##ggplot Heatmap
human_featloadings <- AP_human@featureLoadings

#estimate variance for each row 
var_human_featloadings <- apply(human_featloadings, 1, var)
#top 100 most variable genes
top20_var_human_featloadings_names <- names(sort(var_human_featloadings, decreasing = TRUE))[1:20] 
head(top20_var_human_featloadings_names)
## [1] "TTN-AS1"      "LOC101927055" "ND6"          "RIF1"         "TMEM30A-DT"  
## [6] "ATP2A1-AS1"
top20_var_human_featloadings_matrix <- human_featloadings[top20_var_human_featloadings_names,]

Human NMF Heatmap
Clustered heatmap of human NMF feature/amplitude/gene matrix, selecting for top 20 gene weights with greatest variance amongst patterns

#using wide-form of data to cluster
#top20_human_matrix <- as.numeric(top20_human_featloadings)

#png(file = "/Users/eramsey/Desktop/R21_210302/PreliminaryR21/210706_Human_NMF_Variance_GeneWeights.png")  
heatmap.2(top20_var_human_featloadings_matrix,col=viridis, trace='none', margin=c(11,11),
distfun=function(c) as.dist(1-cor(t(c))) , xlab = "NMF Patterns", 
srtCol = 45, cexCol=2,cexRow=1.7, scale = "row",
hclustfun=function(x) hclust(x, method="average"), main = "Most Variable Genes Across NMF Patterns", cex.main = 6
)

#dev.off()

Human - highest average weighted genes

#to look at the highest weighted genes, going to select genes with highest averages across patterns
avg_human_featloadings <- apply(human_featloadings, 1, mean)

top20_human_featloadings_names <- names(sort(avg_human_featloadings, decreasing = TRUE))[1:20]
head(top20_human_featloadings_names)
## [1] "LOC101927055" "TTN-AS1"      "ND6"          "RIF1"         "ND5"         
## [6] "LOC105377590"

Human - data wrangling

#data wrangling
top20_human_featloadings_matrix <- human_featloadings[top20_human_featloadings_names,]
top20_human_featloadings <- as.data.frame(top20_human_featloadings_matrix)
top20_human_featloadings <- rownames_to_column(top20_human_featloadings, var = "genes")
dim(top20_human_featloadings)
## [1] 20 10
head(top20_human_featloadings)
##          genes Pattern_1  Pattern_2 Pattern_3 Pattern_4 Pattern_5 Pattern_6
## 1 LOC101927055 0.9434009 0.06260934 0.3319990  5.652393 0.2224951 0.4111889
## 2      TTN-AS1 0.4823115 0.08893458 0.2780286  6.151776 0.1403035 0.1835457
## 3          ND6 0.4840103 0.07370391 0.2058961  6.440374 0.1127677 0.3561757
## 4         RIF1 0.9708761 0.05942313 0.1315845  5.017206 0.0838132 0.2022305
## 5          ND5 1.4101026 0.11987590 0.3140898  3.539114 0.1579720 0.3508668
## 6 LOC105377590 0.7213866 0.07830001 0.3563071  4.129803 0.1790764 0.3498320
##   Pattern_7 Pattern_8  Pattern_9
## 1  6.652137 0.2572868 0.07669174
## 2  6.941866 0.2912224 0.04211363
## 3  5.250072 0.2038665 0.04429376
## 4  4.866413 0.0732412 0.04299175
## 5  4.614510 0.2301526 0.02281302
## 6  4.097141 0.2954546 0.05812084

Human - data wrangling
https://jcoliver.github.io/learn-r/009-expression-heatmaps.html

human_featloadings_long <- pivot_longer(data = top20_human_featloadings, 
                         cols = -genes,
                         names_to = "pattern", 
                         values_to = "feat_weight")
head(human_featloadings_long)
## # A tibble: 6 × 3
##   genes        pattern   feat_weight
##   <chr>        <chr>           <dbl>
## 1 LOC101927055 Pattern_1      0.943 
## 2 LOC101927055 Pattern_2      0.0626
## 3 LOC101927055 Pattern_3      0.332 
## 4 LOC101927055 Pattern_4      5.65  
## 5 LOC101927055 Pattern_5      0.222 
## 6 LOC101927055 Pattern_6      0.411
human_featloadings_long$log.weight <- log(human_featloadings_long$feat_weight)
head(human_featloadings_long)
## # A tibble: 6 × 4
##   genes        pattern   feat_weight log.weight
##   <chr>        <chr>           <dbl>      <dbl>
## 1 LOC101927055 Pattern_1      0.943     -0.0583
## 2 LOC101927055 Pattern_2      0.0626    -2.77  
## 3 LOC101927055 Pattern_3      0.332     -1.10  
## 4 LOC101927055 Pattern_4      5.65       1.73  
## 5 LOC101927055 Pattern_5      0.222     -1.50  
## 6 LOC101927055 Pattern_6      0.411     -0.889

Human NMF Heatmap Human Gene/Feature NMF Matrix Heatmap in ggplot, selecting for top highest weightest averages for genes amonst all patterns

human_featloadings_heatmap <- ggplot(human_featloadings_long, mapping = aes(x = pattern, y = genes, fill = log.weight)) + 
  geom_tile() +
  labs(title = "Human NMF Feature Weights") +
  xlab(label = "Patterns") + 
  ylab(label = "Top Weighted Genes") + 
  theme(axis.text.x = element_text(angle = 45, vjust = 0.5), axis.text.y = element_text(size = 5)) + 
  scale_fill_viridis(option = "plasma")
human_featloadings_heatmap

NMF Projection: Mouse onto Human

The mouse NMF patterns can then be projected onto the human data
Scatterplot to compare patterns

human_data <- as.matrix(human_symbol_data)
human_NMF <- projectR(human_data,loadings=AP_mouse, full=TRUE,
                     dataNames=rownames(human_data))
## [1] "18108 row names matched between data and loadings"
## [1] "Updated dimension of data: 18108 58"
#pval_NMF <- human_NMF$pval
projection_NMF <- human_NMF$projection %>% t() %>% cbind(., human_info)

#plot scatterplot
projection_plot <- ggplot(projection_NMF, aes(x = Pattern_3, y = Pattern_9, colour = PrePost)) + geom_point() + ggtitle("Mouse Data NMF Patterns Projected onto Human Data")
projection_plot

#ggsave("NMF_Mouse_Projected_to_Human_14pat5000it.pdf", projection_plot)
ggsave("210628_Scatterplot_Projected_MousetoHuman_Patterns3and9.pdf", projection_plot)
## Saving 7 x 5 in image

PCA to Compare Projected Patterns

#PCA
projection_NMF_mouse2human_pca <- prcomp(human_NMF$projection)
projection_NMF_mouse2human_pca_plot <- ggplot(as.data.frame(projection_NMF_mouse2human_pca$x), aes(x=PC1,y=PC2, colour = "red", "blue")) +geom_point()
#projection_NMF_mouse2human_pca_plot

pc_human <- prcomp(t(human_symbol_data))
pc_mouse <- prcomp(t(conv_mouse))
#find variance
pc_var_mouse <- round(((pc_mouse$sdev)^2/sum(pc_mouse$sdev^2))*100,2)
pca_mouse_df <- data.frame(cbind(pc_mouse$x, mouse_info))

Boxplot to Compare Pre/Post Projected Patterns

NMF_projection_long <- projection_NMF %>% rownames_to_column(., var = "Sample") %>% pivot_longer(starts_with("Pattern_"), names_to = "Patterns", values_to = "Factors") %>% unite( col = "Exercise_Pattern", Patterns, PrePost, sep = "_", remove = FALSE)


projection_NMF_boxplot <- ggplot(NMF_projection_long, aes(x = Exercise_Pattern, y = Factors)) +
  geom_boxplot(color = "gray60") +
  geom_point(aes(color = PrePost)) +
  geom_jitter(size = 2, alpha = 0.25, width = 0.2) +
  scale_colour_viridis_d() +
  labs(x = "Projected Pre and Post Exercise Patterns", y = "Factors") + 
  theme(panel.grid = element_blank(), axis.text.x = element_text(angle = 45, vjust = 0.5)) +
    labs(title = "Projected Mouse NMF Pre/Post Exercise Patterns onto Human Data")
projection_NMF_boxplot

Scatterplot to Compare Pre/Post Projected Patterns

projection_NMF_scatterplot <- ggplot(NMF_projection_long, aes(x = Patterns, y = Factors)) +
  geom_point(aes(color = PrePost)) +
  geom_jitter(size = 2, alpha = 0.5, width = 0.2) +
  scale_colour_viridis_d() +
  labs(x = "Projected Pre and Post Exercise Patterns", y = "Factors") + 
  theme(panel.grid = element_blank(), axis.text.x = element_text(angle = 45, vjust = 0.5)) +
    labs(title = "Projected Mouse NMF Pre/Post Exercise Patterns onto Human Data")
projection_NMF_scatterplot

NMF Projection: Human onto Mouse

mouse_NMF <- projectR(as.matrix(conv_mouse),loadings=AP_human, full=TRUE,
                     dataNames=rownames(conv_mouse))
## [1] "18108 row names matched between data and loadings"
## [1] "Updated dimension of data: 18108 16"
projection_NMF_human2mouse <- mouse_NMF$projection %>% t() %>% cbind(., mouse_info)

#plot scatterplot
projection_plot_human2mouse <- ggplot(projection_NMF_human2mouse, aes(x = Pattern_6, y = Pattern_9, colour = Exercise)) + geom_point() + ggtitle("Mouse Data NMF Patterns Projected onto Human Data")
projection_plot_human2mouse

PC Projection

Another ProjectR method to projections. Principal components are found for mouse and human data. Mouse PC’s are used for loadings and projected onto human data.

Make PC’s

pc_human <- prcomp(t(human_symbol_data))
pc_mouse <- prcomp(t(conv_mouse))
#find variance
pc_var_mouse <- round(((pc_mouse$sdev)^2/sum(pc_mouse$sdev^2))*100,2)
pca_mouse_df <- data.frame(cbind(pc_mouse$x, mouse_info))

Project PC’s

PCA_projectr <- projectR(human_data, loadings = pc_mouse, full = TRUE, dataNames = rownames(human_data))
## [1] "18108 row names matched between data and loadings"
## [1] "Updated dimension of data: 18108 58"
human_meta <- column_to_rownames(human_info_test, var = "SRR")
human_meta_t <- t(human_meta)
PCA_projectr_t <- t(PCA_projectr[[1]])
PCA_projectr_df <- cbind(PCA_projectr_t, human_meta)
#[1] "18282 row names matched between data and loadings"
#[1] "Updated dimension of data: 18282 58"

#Plot PCA
dPCA <- data.frame(cbind(t(PCA_projectr[[1]]),PCA_projectr_df))
projected_PCA <- ggplot(PCA_projectr_df, aes(x = PC1, y = PC2, colour = PrePost)) + 
  geom_point(size = 3.5) + 
  scale_color_viridis_d() +
  theme(panel.grid = element_blank(), axis.text = element_text(size = 16), axis.title = element_text(size = 16), title = element_text(size = 18), legend.text = element_text(size = 16)) +
  ggtitle("Mouse Principal Components \nProjected onto Human Data")
projected_PCA

Clustering Projection

There’s 2 methods: cluster2pattern finds correlation of each gene’s expression by mean of cluster to define continuous weights. Then intersector tests significant overlap between 2 clustering objects. For these purposes, I think cluster2pattern is better for finding what we want (differences between clusters rather than overlaps). Requires a clustering object first.

#TOP 50 DIFFERENTIALLY EXPRESSED GENES
#checking standard deviation for each gene
stdevs <- apply(conv_mouse, 1, sd)
mouse_stdev <- cbind(conv_mouse, stdevs)
mouse_stdev <- slice_max(mouse_stdev, order_by = stdevs, n = 250)
mouse_top250 <- mouse_stdev[,-17]
write.csv2(mouse_top250, file = "Mouse_Top250_DEG.csv")
library(fpc)
#pamk: partitioning around medoids clustering with the number of clusters estimated by optimum average silhouette width 
#part_clust_mouse <- pamk(stdevs, scaling = TRUE)
#plot(part_clust_mouse$pamobject)

#  Hierarchical Clustering
mouse_dist <- dist(mouse_top250, method = "euclidean") # distance matrix 
hclust_fit <- hclust(mouse_dist, method="average")
#  Hierarchical Clustering with Bootstrapped p values
library(pvclust)
#pvclust() provides p-values for hierarchical clustering based on multiscale bootstrap resampling-clusters that are highly supported by the data will have large p values.
#pvclust clusters columns, not rows: transpose your data before using
fit <- pvclust(mouse_top250, method.hclust="average",
   method.dist="euclidean")
## Bootstrap (r = 0.5)... Done.
## Bootstrap (r = 0.6)... Done.
## Bootstrap (r = 0.7)... Done.
## Bootstrap (r = 0.8)... Done.
## Bootstrap (r = 0.9)... Done.
## Bootstrap (r = 1.0)... Done.
## Bootstrap (r = 1.1)... Done.
## Bootstrap (r = 1.2)... Done.
## Bootstrap (r = 1.3)... Done.
## Bootstrap (r = 1.4)... Done.
hclust_dendro_sig <- plot(fit) 
pvrect(fit, alpha=.90) # add rectangles around groups highly supported by the data

Clustering in Human Data
Hierarchical Clustering with Bootstrapped p values
pvclust() provides p-values for hierarchical clustering based on multiscale bootstrap resampling-clusters that are highly supported by the data will have large p values. (Note: pvclust clusters columns, not rows: transpose your data before using)

#TOP 50 DIFFERENTIALLY EXPRESSED GENES
human_stdevs <- apply(human_symbol_data, 1, sd)
human_stdev <- cbind(human_symbol_data, human_stdevs)
human_stdev <- slice_max(human_stdev, order_by = human_stdevs, n = 50)
human_top250 <- human_stdev[,-17]
write.csv2(human_top250, file = "Human_Top50_DEG.csv")


fit_human <- pvclust(human_top250, method.hclust="average",
   method.dist="euclidean")
## Bootstrap (r = 0.5)... Done.
## Bootstrap (r = 0.6)... Done.
## Bootstrap (r = 0.7)... Done.
## Bootstrap (r = 0.8)... Done.
## Bootstrap (r = 0.9)... Done.
## Bootstrap (r = 1.0)... Done.
## Bootstrap (r = 1.1)... Done.
## Bootstrap (r = 1.2)... Done.
## Bootstrap (r = 1.3)... Done.
## Bootstrap (r = 1.4)... Done.
human_hclust_dendro_sig <- plot(fit_human) 

#pvrect(fit, alpha=.90) # add rectangles around groups highly supported by the data

Output of the cluster2pattern function is a pclust class object; specifically, a matrix of genes (rows) by clusters (columns). A gene’s value outside of its assigned cluster is zero. For the cluster containing a given gene, the gene’s value is the correlation of the gene’s expression to the mean of that cluster.

mouse_clusters <- cluster2pattern(hclust_fit, NP = 2, Data = mouse_top250)

Projecting mouse clusters onto human data

#need to use the top 50 genes from mouse DE to pull those genes from human and then project cluster object from cluster2pattern onto human data 
top50_list <- rownames_to_column(mouse_top250, var = "SYMBOL")
human_list <- rownames_to_column(human_symbol_data_GO, var = "SYMBOL")
top50_list <- semi_join(human_list, top50_list, by = "SYMBOL")
mouseToHumanTop50 <- column_to_rownames(top50_list, var = "SYMBOL")
mouseToHumanTop50 <- mouseToHumanTop50[,5:62]
mouseToHumanTop50 <- as.matrix(mouseToHumanTop50)
cluster_projection <- projectR(mouseToHumanTop50, loadings = mouse_clusters, full = TRUE, dataNames = rownames(mouseToHumanTop50))
## [1] "250 row names matched between data and loadings"
## [1] "Updated dimension of data: 250 58"
#combining projection with metadata and preparing for PCA
cluster_projection_t <- t(cluster_projection[[1]])
cluster_projection_df <- cbind(cluster_projection_t, human_meta)
cluster_projection_plot <- ggplot(cluster_projection_df, aes(x = x1, y = x2, colour = PrePost)) + geom_point() + ggtitle("Mouse Clusters Projected onto Human Data")
cluster_projection_plot

ggsave("Mouse_Clusters_Projection.pdf", cluster_projection_plot)
## Saving 7 x 5 in image

Functional Enrichment Analysis

Mouse ggprofiler https://biit.cs.ut.ee/gprofiler/page/r
g:Profiler uses all the genes annotated to that term as an input (in this case about six hundred human genes associated to heart development). Fully numeric identifiers need to be prefixed with the corresponding namespace. g:Profiler will automatically prefix all the detected numeric IDs using the prefix determined by the selected numeric namespace parameter.
gost enables to perform functional profiling of gene lists. The function performs statistical enrichment analysis to find over-representation of functions from Gene Ontology, biological pathways like KEGG and Reactome, human disease annotations, etc. This is done with the hypergeometric test followed by correction for multiple testing.
In order to reduce the amount of false positives, a multiple testing correction method is applied to the enrichment p-values. By default, our tailor-made algorithm g:SCS is used (correction_method = “gSCS” with synonyms g_SCS and analytical), but there are also options to apply the Bonferroni correction (correction_method = “bonferroni”) or FDR (correction_method = “fdr”). The adjusted p-values are reported in the results.

Human FEA using gprofiler

#input needs to be a dataframe with genes and p values, so each pattern will need its own dataframe
#sample_factors has already been extracted from AP_human, need to do the same with the stdev
human_feature <- as.data.frame(AP_human@featureLoadings)
human_feature_stdev <- as.data.frame(AP_human@loadingStdDev)
pattern5_human <- dplyr::select(human_feature, Pattern_5) %>% cbind(., StdDev = human_feature_stdev$Pattern_5)
head(pattern5_human)
##           Pattern_5    StdDev
## DDX11L2  0.08131652 0.1860833
## DDX11L16 0.03840808 0.1213581
## DDX11L1  0.09497906 0.2578371
## DDX11L5  0.03780625 0.1084210
## DDX11L17 0.15602939 0.2845393
## WASH7P   0.11989398 0.2513702
pattern1_human <- dplyr::select(human_feature, Pattern_1) %>% cbind(., StdDev = human_feature_stdev$Pattern_1)
head(pattern1_human)
##          Pattern_1    StdDev
## DDX11L2  1.1809987 0.8131181
## DDX11L16 0.2364825 0.4237808
## DDX11L1  0.1353610 0.2749353
## DDX11L5  0.4262073 0.6837504
## DDX11L17 0.5320511 0.5834329
## WASH7P   1.0691203 0.6250860
#want to look at just the significant genes of pattern 5
pattern5_sig <- pattern5_human[pattern5_human$StdDev < 0.1,]
pattern1_sig <- pattern1_human[pattern1_human$StdDev < 0.1,]

pattern5_human_names <- rownames(pattern5_sig) %>% as.list()
names(pattern5_human_names) <- rownames(pattern5_sig)

Subsetting human data differently

human_feature_df <- rownames_to_column(human_feature, var = "Genes")
human_nmf_geneweight_long <- pivot_longer(data = human_feature_df, 
                         cols = -Genes,
                         names_to = "Pattern", 
                         values_to = "Gene_weight")
head(human_nmf_geneweight_long)
## # A tibble: 6 × 3
##   Genes   Pattern   Gene_weight
##   <chr>   <chr>           <dbl>
## 1 DDX11L2 Pattern_1      1.18  
## 2 DDX11L2 Pattern_2      0.142 
## 3 DDX11L2 Pattern_3      0.309 
## 4 DDX11L2 Pattern_4      0.0963
## 5 DDX11L2 Pattern_5      0.0813
## 6 DDX11L2 Pattern_6      0.278
human_pattern2 <- human_nmf_geneweight_long %>% dplyr::filter( Pattern == "Pattern_2") %>% slice_max(order_by = Gene_weight, n = 150)
human_pattern5 <- human_nmf_geneweight_long %>% dplyr::filter( Pattern == "Pattern_5") %>% slice_max(order_by = Gene_weight, n = 150)
human_pattern9 <- human_nmf_geneweight_long %>% dplyr::filter( Pattern == "Pattern_9") %>% slice_max(order_by = Gene_weight, n = 150)

human_pattern2_names <- as.list(human_pattern2$Genes)
names(human_pattern2_names) <- human_pattern2$Genes

human_pattern5_names <- as.list(human_pattern5$Genes)
names(human_pattern5_names) <- human_pattern5$Genes

human_pattern9_names <- as.list(human_pattern9$Genes)
names(human_pattern9_names) <- human_pattern9$Genes

Human gprofiler gost() runs

#look into exclude_iea and correction_methods
gost_res_2 <- gost(query =  human_pattern2_names, organism = "hsapiens", ordered_query = FALSE, significant = TRUE, exclude_iea = FALSE, measure_underrepresentation = FALSE, evcodes = FALSE, 
                user_threshold = 0.05, correction_method = "g_SCS", 
                domain_scope = "annotated", custom_bg = NULL, 
                numeric_ns = "", sources = c("REAC", "KEGG", "CORUM"), as_short_link = FALSE)
head(gost_res_2$result)
##    query significant    p_value term_size query_size intersection_size
## 1 ATP1A2        TRUE 0.04423077        23          1                 1
## 2 ATP2A1        TRUE 0.04994746         2          1                 1
## 3   CANX        TRUE 0.02497194         2          1                 1
## 4   CANX        TRUE 0.02497194         2          1                 1
## 5   GOT2        TRUE 0.01153846         6          1                 1
## 6   GOT2        TRUE 0.03269231        17          1                 1
##   precision     recall           term_id source
## 1         1 0.04347826        KEGG:04964   KEGG
## 2         1 0.50000000        CORUM:6514  CORUM
## 3         1 0.50000000 REAC:R-HSA-168268   REAC
## 4         1 0.50000000 REAC:R-HSA-168316   REAC
## 5         1 0.16666667        KEGG:00400   KEGG
## 6         1 0.05882353        KEGG:00360   KEGG
##                                             term_name effective_domain_size
## 1             Proximal tubule bicarbonate reclamation                  8000
## 2                                  SLN-ATP2A1 complex                  3627
## 3                          Virus Assembly and Release                 10622
## 4    Assembly of Viral Components at the Budding Site                 10622
## 5 Phenylalanine, tyrosine and tryptophan biosynthesis                  8000
## 6                            Phenylalanine metabolism                  8000
##   source_order           parents
## 1          377        KEGG:00000
## 2         2189     CORUM:0000000
## 3         2425 REAC:R-HSA-168255
## 4          164 REAC:R-HSA-168268
## 5           51        KEGG:00000
## 6           44        KEGG:00000
gost_res_5 <- gost(query =  human_pattern5_names, organism = "hsapiens", ordered_query = FALSE, significant = TRUE, exclude_iea = FALSE, measure_underrepresentation = FALSE, evcodes = FALSE, 
                user_threshold = 0.05, correction_method = "g_SCS", 
                domain_scope = "annotated", custom_bg = NULL, 
                numeric_ns = "", sources = c("REAC", "KEGG", "CORUM"), as_short_link = FALSE)
head(gost_res_5$result)
##      query significant    p_value term_size query_size intersection_size
## 1   ATP1A4        TRUE 0.04423077        23          1                 1
## 2     COQ6        TRUE 0.02115385        11          1                 1
## 3     DTNA        TRUE 0.04994746         2          1                 1
## 4     FXR1        TRUE 0.04994746         2          1                 1
## 5 HSP90AA1        TRUE 0.04994746         2          1                 1
## 6 HSP90AA1        TRUE 0.04994746         2          1                 1
##   precision     recall    term_id source
## 1         1 0.04347826 KEGG:04964   KEGG
## 2         1 0.09090909 KEGG:00130   KEGG
## 3         1 0.50000000 CORUM:7341  CORUM
## 4         1 0.50000000 CORUM:5360  CORUM
## 5         1 0.50000000 CORUM:4158  CORUM
## 6         1 0.50000000 CORUM:5716  CORUM
##                                             term_name effective_domain_size
## 1             Proximal tubule bicarbonate reclamation                  8000
## 2 Ubiquinone and other terpenoid-quinone biosynthesis                  8000
## 3                                CTNNAL1-DNTA complex                  3627
## 4                 AGO2-FXR1-TNF(alpha)ARE-RNP complex                  3627
## 5                     HSP90-FKBP38-CAM-Ca(2+) complex                  3627
## 6                    NOS3-HSP90 complex, VEGF induced                  3627
##   source_order       parents
## 1          377    KEGG:00000
## 2           17    KEGG:00000
## 3         2781 CORUM:0000000
## 4         1646 CORUM:0000000
## 5         1539 CORUM:0000000
## 6         1782 CORUM:0000000
gost_res_9 <- gost(query =  human_pattern9_names, organism = "hsapiens", ordered_query = FALSE, significant = TRUE, exclude_iea = FALSE, measure_underrepresentation = FALSE, evcodes = FALSE, 
                user_threshold = 0.05, correction_method = "g_SCS", 
                domain_scope = "annotated", custom_bg = NULL, 
                numeric_ns = "", sources = c("REAC", "KEGG", "CORUM"), as_short_link = FALSE)
head(gost_res_9$result)
##     query significant    p_value term_size query_size intersection_size
## 1   ATXN1        TRUE 0.04994746         2          1                 1
## 2 CYP27A1        TRUE 0.03269231        17          1                 1
## 3 CYP27A1        TRUE 0.01248597         1          1                 1
## 4    ESR2        TRUE 0.04994746         2          1                 1
## 5    FUT2        TRUE 0.02884615        15          1                 1
## 6    FUT2        TRUE 0.02497194         2          1                 1
##   precision     recall            term_id source
## 1         1 0.50000000         CORUM:7236  CORUM
## 2         1 0.05882353         KEGG:00120   KEGG
## 3         1 1.00000000 REAC:R-HSA-5578996   REAC
## 4         1 0.50000000         CORUM:6087  CORUM
## 5         1 0.06666667         KEGG:00603   KEGG
## 6         1 0.50000000 REAC:R-HSA-9033807   REAC
##                                                    term_name
## 1                                          ARXN1-CIC complex
## 2                             Primary bile acid biosynthesis
## 3                               Defective CYP27A1 causes CTX
## 4                                          ERbeta-AR complex
## 5 Glycosphingolipid biosynthesis - globo and isoglobo series
## 6                               ABO blood group biosynthesis
##   effective_domain_size source_order            parents
## 1                  3627         2711      CORUM:0000000
## 2                  8000           15         KEGG:00000
## 3                 10622          489 REAC:R-HSA-5579029
## 4                  3627         1880      CORUM:0000000
## 5                  8000           99         KEGG:00000
## 6                 10622            7 REAC:R-HSA-9033658

Data wrangling for human alluvial point

#need to add an identifier for pattern number for each dataframe
gost_pattern2 <- cbind(gost_res_2$result, pattern = c(rep("pattern_2")))
gost_pattern5 <- cbind(gost_res_5$result, pattern = c(rep("pattern_5")))
gost_pattern9 <- cbind(gost_res_9$result, pattern = c(rep("pattern_9")))

Human alluvial plot - Patterns 2, 5, and 9

#combine dataframes
gost_pattern259_all <- rbind(gost_pattern2, gost_pattern5, gost_pattern9)
#looking at just kegg results
gost_pattern259 <- dplyr::filter(gost_pattern259_all, source == "KEGG")

#gost_pattern259_table <- apply(gost_pattern259, 2, as.character)
#write.csv2(gost_pattern259_table, file = "gost_human_results_patterns259.csv")
ggalluvial_KEGG_259 <- ggplot(gost_pattern259,
       aes(axis1 = pattern, axis2 = term_name, y = query_size)) +
  geom_alluvium(aes(fill = term_name), width = 1/12) +
  geom_stratum(width = 1/24, fill = "black", color = "grey") +
 # geom_label(stat = "stratum", label.size = 0.15, aes(label = NULL, size = .5), nudge_x = -0.1) +
  scale_x_discrete(limits = c("Pattern", "Pathway"), expand = c(.05, .05)) +
  scale_colour_viridis_d(option = "magma") +
  labs(y = NULL) +
  theme(axis.text.x = element_text(size=14), axis.text.y = element_blank(), axis.title = element_text(size = 16), title = element_text(size = 18), legend.text = element_text(size = 14)) +
  theme(panel.grid = element_blank()) +
  theme(plot.margin = unit(c(1,.075,1,.5),"cm")) + #top, right, bottom, left
  guides(fill=guide_legend(title="KEGG Pathways"), col = guide_legend(nrow = 2)) + 
  ggtitle("Pre/Post Exercise-Associated \nPattern Pathways")
ggalluvial_KEGG_259

Alluvial Plot - Mouse
Looking at all gost results and just KEGG/Reactome/Corum

Alluvial Plot - Mouse
Looking just at gost profiler KEGG results

Alluvial Plot - Mouse
Looking just at gost profiler GO results

Versions

installed.packages()[names(sessionInfo()$otherPkgs), "Version"]
##              pvclust                  fpc         org.Mm.eg.db 
##              "2.2-0"              "2.2-9"             "3.12.0" 
##         org.Hs.eg.db             reshape2              viridis 
##             "3.12.0"              "1.4.4"              "0.6.1" 
##          viridisLite   EnsDb.Hsapiens.v86            ensembldb 
##              "0.4.0"             "2.99.0"             "2.14.1" 
##     AnnotationFilter      GenomicFeatures           gprofiler2 
##             "1.14.0"             "1.42.3"              "0.2.0" 
##              ggrepel           ggalluvial                tidyr 
##              "0.9.1"             "0.12.3"              "1.1.3" 
##              biomaRt             ggbiplot               scales 
##             "2.47.9"               "0.55"              "1.1.1" 
##                 plyr             devtools              usethis 
##              "1.8.6"              "2.4.2"              "2.0.1" 
##        AnnotationDbi               tibble                dplyr 
##             "1.52.0"              "3.1.3"              "1.0.7" 
##               gplots               DESeq2 SummarizedExperiment 
##              "3.1.1"             "1.30.1"             "1.20.0" 
##       MatrixGenerics          matrixStats        GenomicRanges 
##              "1.2.1"             "0.60.0"             "1.42.0" 
##         GenomeInfoDb              IRanges            S4Vectors 
##             "1.26.7"             "2.24.1"             "0.28.1" 
##              ggplot2               CoGAPS             projectR 
##              "3.3.5"             "3.10.0"              "1.6.0" 
##              Biobase         BiocGenerics 
##             "2.50.0"             "0.36.1"